Publications

Shen, Y., Feng, C., Yang, Y., Tian, D., "Mining Point Cloud Local Structures by Kernel Correlation and Graph Pooling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018.
BibTeX TR2018-041 PDF Software
- @inproceedings{Shen2018jun,
- author = {Shen, Yiru and Feng, Chen and Yang, Yaoqing and Tian, Dong},
- title = {Mining Point Cloud Local Structures by Kernel Correlation and Graph Pooling},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2018,
- month = jun,
- url = {https://www.merl.com/publications/TR2018-041}
- }
Yang, Y., Feng, C., Shen, Y., Tian, D., "FoldingNet: Point Cloud Auto-encoder via Deep Grid Deformation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2018.00029, June 2018.
BibTeX TR2018-042 PDF Software
- @inproceedings{Yang2018jun,
- author = {Yang, Yaoqing and Feng, Chen and Shen, Yiru and Tian, Dong},
- title = {FoldingNet: Point Cloud Auto-encoder via Deep Grid Deformation},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2018,
- month = jun,
- doi = {10.1109/CVPR.2018.00029},
- url = {https://www.merl.com/publications/TR2018-042}
- }
Zhang, Z., Wu, Y., Wang, G., "BPGrad: Towards Global Optimality in Deep Learning via Branch and Pruning", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018, pp. 3301-3309.
BibTeX TR2018-068 PDF
- @inproceedings{Zhang2018jun,
- author = {Zhang, Ziming and Wu, Yuanwei and Wang, Guanghui},
- title = {BPGrad: Towards Global Optimality in Deep Learning via Branch and Pruning},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2018,
- pages = {3301--3309},
- month = jun,
- url = {https://www.merl.com/publications/TR2018-068}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "Nonlinear Equalization with Deep Learning for Multi-Purpose Visual MIMO Communications", IEEE International Conference on Communications (ICC), DOI: 10.1109/ICC.2018.8422544, May 2018.
BibTeX TR2018-039 PDF
- @inproceedings{Fujihashi2018may,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
- title = {Nonlinear Equalization with Deep Learning for Multi-Purpose Visual MIMO Communications},
- booktitle = {IEEE International Conference on Communications (ICC)},
- year = 2018,
- month = may,
- doi = {10.1109/ICC.2018.8422544},
- url = {https://www.merl.com/publications/TR2018-039}
- }
Settle, S., Le Roux, J., Hori, T., Watanabe, S., Hershey, J.R., "End-to-End Multi-Speaker Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461893, April 2018, pp. 4819-4823.
BibTeX TR2018-001 PDF Video
- @inproceedings{Settle2018apr,
- author = {Settle, Shane and Le Roux, Jonathan and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
- title = {End-to-End Multi-Speaker Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {4819--4823},
- month = apr,
- doi = {10.1109/ICASSP.2018.8461893},
- url = {https://www.merl.com/publications/TR2018-001}
- }
Wen, B., Kamilov, U., Liu, D., Mansour, H., Boufounos, P.T., "DeepCASD: An End-to-End Approach for Multi-Spectural Image Super-Resolution", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8461795, April 2018, pp. 6503-6507.
BibTeX TR2018-009 PDF
- @inproceedings{Wen2018apr,
- author = {Wen, Bihan and Kamilov, Ulugbek and Liu, Dehong and Mansour, Hassan and Boufounos, Petros T.},
- title = {DeepCASD: An End-to-End Approach for Multi-Spectural Image Super-Resolution},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {6503--6507},
- month = apr,
- doi = {10.1109/ICASSP.2018.8461795},
- url = {https://www.merl.com/publications/TR2018-009}
- }
Zhang, Z., Xu, W., Sullivan, A., "Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning", arXiv, March 2018.
BibTeX arXiv
- @article{Zhang2018mar,
- author = {Zhang, Ziming and Xu, Wenju and Sullivan, Alan},
- title = {Time-Delay Momentum: A Regularization Perspective on the Convergence and Generalization of Stochastic Momentum for Deep Learning},
- journal = {arXiv},
- year = 2018,
- month = mar,
- url = {https://arxiv.org/abs/1903.00760}
- }
Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
BibTeX TR2017-192 PDF
- @article{Ochiai2017oct2,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
- title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1274--1288},
- month = oct,
- doi = {10.1109/JSTSP.2017.2764276},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-192}
- }
Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
BibTeX TR2017-190 PDF Video
- @article{Watanabe2017oct,
- author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
- title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1240--1253},
- month = oct,
- doi = {10.1109/JSTSP.2017.2763455},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-190}
- }
Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
BibTeX TR2017-132 PDF Video
- @inproceedings{Hori2017aug,
- author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
- title = {Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM},
- booktitle = {Interspeech},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-132}
- }
Abbeloos, W., Caccamo, S., Ataer-Cansizoglu, E., Taguchi, Y., Feng, C., Lee, T.-Y., "Detecting and Grouping Identical Objects for Region Proposal and Classification", CVPR Workshop on Deep Learning for Robotic Vision, DOI: 10.1109/CVPRW.2017.76, July 2017.
BibTeX TR2017-099 PDF
- @inproceedings{Abbeloos2017jul,
- author = {Abbeloos, Wim and Caccamo, Sergio and Ataer-Cansizoglu, Esra and Taguchi, Yuichi and Feng, Chen and Lee, Teng-Yok},
- title = {Detecting and Grouping Identical Objects for Region Proposal and Classification},
- booktitle = {CVPR Workshop on Deep Learning for Robotic Vision},
- year = 2017,
- month = jul,
- doi = {10.1109/CVPRW.2017.76},
- url = {https://www.merl.com/publications/TR2017-099}
- }
Yu, Z., Feng, C., Liu, M.-Y., Ramalingam, S., "CASENet: Deep Category-Aware Semantic Edge Detection", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2017.191, July 2017.
BibTeX TR2017-100 PDF Video Data Software
- @inproceedings{Yu2017jul,
- author = {Yu, Zhiding and Feng, Chen and Liu, Ming-Yu and Ramalingam, Srikumar},
- title = {CASENet: Deep Category-Aware Semantic Edge Detection},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2017,
- month = jul,
- doi = {10.1109/CVPR.2017.191},
- url = {https://www.merl.com/publications/TR2017-100}
- }
Feng, C., Liu, M.-Y., Kao, C.-C., Lee, T.-Y., "Deep Active Learning for Civil Infrastructure Defect Detection and Classification", International Workshop on Computing in Civil Engineering (IWCCE), June 2017.
BibTeX TR2017-034 PDF
- @inproceedings{Feng2017jun,
- author = {Feng, Chen and Liu, Ming-Yu and Kao, Chieh-Chi and Lee, Teng-Yok},
- title = {Deep Active Learning for Civil Infrastructure Defect Detection and Classification},
- booktitle = {International Workshop on Computing in Civil Engineering (IWCCE)},
- year = 2017,
- month = jun,
- url = {https://www.merl.com/publications/TR2017-034}
- }
Farahmand, A.-M., Nabi, S., Nikovski, D.N., "Deep Reinforcement Learning for Partial Differential Equation Control", American Control Conference (ACC), DOI: 10.23919/ACC.2017.7963427, May 2017.
BibTeX TR2017-063 PDF
- @inproceedings{Farahmand2017may,
- author = {Farahmand, Amir-massoud and Nabi, Saleh and Nikovski, Daniel N.},
- title = {Deep Reinforcement Learning for Partial Differential Equation Control},
- booktitle = {American Control Conference (ACC)},
- year = 2017,
- month = may,
- doi = {10.23919/ACC.2017.7963427},
- url = {https://www.merl.com/publications/TR2017-063}
- }
Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-010 PDF
- @inproceedings{Luo2017mar,
- author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and Le Roux, Jonathan and Mesgarani, Nima},
- title = {Deep Clustering and Conventional Networks for Music Separation: Strong Together},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-010}
- }
Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-012 PDF
- @inproceedings{Meng2017mar,
- author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
- title = {Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-012}
- }
Hara, K., Liu, M.-Y., Tuzel, C.O., Farahmand, A.-M., "Attentional Network for Visual Object Detection", arXiv, January 2017.
BibTeX arXiv
- @article{Hara2017jan,
- author = {Hara, Kota and Liu, Ming-Yu and Tuzel, C. Oncel and Farahmand, Amir-massoud},
- title = {Attentional Network for Visual Object Detection},
- journal = {arXiv},
- year = 2017,
- month = jan,
- url = {https://arxiv.org/abs/1702.01478}
- }
Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/APSIPA.2016.7820724, December 2016.
BibTeX TR2016-162 PDF
- @inproceedings{Xiao2016dec,
- author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
- title = {Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition},
- booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
  },
- year = 2016,
- month = dec,
- doi = {10.1109/APSIPA.2016.7820724},
- url = {https://www.merl.com/publications/TR2016-162}
- }
Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
BibTeX TR2016-113 PDF
- @techreport{LeRoux2016sep,
- author = {Le Roux, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
- title = {Learning-Based Approaches to Speech Enhancement and Separation},
- booktitle = {Interspeech Tutorials},
- year = 2016,
- month = sep,
- url = {https://www.merl.com/publications/TR2016-113}
- }
Isik, Y., Le Roux, J., Chen, Z., Watanabe, S., Hershey, J.R., "Single-Channel Multi-Speaker Separation using Deep Clustering", Interspeech, DOI: 10.21437/Interspeech.2016-1176, September 2016, pp. 545-549.
BibTeX TR2016-073 PDF
- @inproceedings{Isik2016sep,
- author = {Isik, Yusuf and Le Roux, Jonathan and Chen, Zhuo and Watanabe, Shinji and Hershey, John R.},
- title = {Single-Channel Multi-Speaker Separation using Deep Clustering},
- booktitle = {Interspeech},
- year = 2016,
- pages = {545--549},
- month = sep,
- doi = {10.21437/Interspeech.2016-1176},
- url = {https://www.merl.com/publications/TR2016-073}
- }
Kamilov, U., Mansour, H., "Learning MMSE Optimal Thresholds for FISTA", International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST), August 2016.
BibTeX TR2016-111 PDF
- @inproceedings{Kamilov2016aug,
- author = {Kamilov, Ulugbek and Mansour, Hassan},
- title = {Learning MMSE Optimal Thresholds for FISTA},
- booktitle = {International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST)},
- year = 2016,
- month = aug,
- url = {https://www.merl.com/publications/TR2016-111}
- }
Son, K., Liu, M.-Y., Taguchi, Y., "Learning to Remove Multipath Distortions in Time-of-Flight Range Images for a Robotic Arm Setup", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA.2016.7487515, May 2016, pp. 3390-3397.
BibTeX TR2016-036 PDF
- @inproceedings{Son2016may,
- author = {Son, Kilho and Liu, Ming-Yu and Taguchi, Yuichi},
- title = {Learning to Remove Multipath Distortions in Time-of-Flight Range Images for a Robotic Arm Setup},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2016,
- pages = {3390--3397},
- month = may,
- doi = {10.1109/ICRA.2016.7487515},
- url = {https://www.merl.com/publications/TR2016-036}
- }
Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
BibTeX TR2016-003 PDF
- @inproceedings{Hershey2016mar,
- author = {Hershey, John R. and Chen, Zhuo and Le Roux, Jonathan and Watanabe, Shinji},
- title = {Deep Clustering: Discriminative Embeddings for Segmentation and Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {31--35},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471631},
- url = {https://www.merl.com/publications/TR2016-003}
- }
Liu, M.-Y., Mallya, A., Tuzel, C.O., Chen, X., "Unsupervised Network Pretraining via Encoding Human Design", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/WACV.2016.7477698, March 2016, pp. 1-9.
BibTeX TR2016-022 PDF
- @inproceedings{Liu2016mar,
- author = {Liu, Ming-Yu and Mallya, Arun and Tuzel, C. Oncel and Chen, Xi},
- title = {Unsupervised Network Pretraining via Encoding Human Design},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2016,
- pages = {1--9},
- month = mar,
- doi = {10.1109/WACV.2016.7477698},
- url = {https://www.merl.com/publications/TR2016-022}
- }
Tachioka, Y., Watanabe, S., "Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features", Interspeech, September 2015, vol. 1 or 5, pp. 3541.
BibTeX TR2015-099 PDF
- @inproceedings{Tachioka2015sep,
- author = {Tachioka, Y. and Watanabe, S.},
- title = {Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3541,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-099}
- }